clear all 
set more off 
set maxvar 15000 
clear matrix


**********************************************************
* PART 1: CENSUS FATHERS
**********************************************************
    *---------------------------------------*
    * IDENTIFY FATHERS AND LIMIT SAMPLE
    *---------------------------------------*
    foreach d in 2 3 4 5 6 7 8   {

        local d_minus1 = `d'-1
        local d_plus1 = `d'+1

        use "$CensusData/input/Census_1910to2010_1pct_raw.dta", clear 
        
        keep if year==19`d'0 | year==19`d_plus1'0
        tab year
        
        tempfile fulldata
        save `fulldata'
         
    * Restrict to children younger than 18   
        keep if age<18 
        
    /* Keep fathers with children in the right birth cohorts 
       (e.g., for 1930 Census, keep fathers with children 
       born in 1920s). */
        keep if birthyr>=19`d_minus1'0 & birthyr<19`d'0
        keep year serial poploc age
        tab age
        
        replace poploc=. if poploc==0 
        drop if poploc==. //exclude children without a father in the house
        
    /*  Keep all unique father ids. 
        Some fathers will have multiple 
        children in the Census. */
        bysort year serial poploc: keep if _n==1 
        rename poploc pernum
        drop age

        tempfile children 
        save `children'

    * Keep the sample of fathers 
        use `fulldata', clear
        merge 1:1 year serial pernum using `children'
        keep if _merge==3 
        drop _merge
        assert sex==1
        
    * Keep fathers ages 30 to 50
        keep if inrange(age,30,50) 

    * Keep white and black race 
        keep if race==1 | race==2 
        
    * Race dummies
        gen black = race==2
        gen white = black==0
        
    * Dummy: Census father resides in the South
        gen south_merge = (inrange(region,31,34)) if region<90
        tab region south_merge,m            
            
    * Fix Census weight to have mean 1
        sum perwt
        local weight_avg = r(mean)
        
        replace perwt = perwt/`weight_avg' 
            
    *---------------------------------------*
    /* Crosswalk Census occupations to 
       ANES occupations */
    *---------------------------------------*
        sort occ1950 
        replace occ1950=. if occ1950>=980

    * Separate people with occupations in 200's based on self-employment
        replace occ1950=occ1950+1000 if (occ1950>=200 & occ1950<=290) & classwkr==1

    * Crosswalk 
        merge m:1 occ1950 using "$Crosswalks/Crosswalk_1950Census_toANES.dta"
            assert _merge!=1
            drop if _merge==2
            drop _merge
                
        ren occ1950ej fatheroccej           

    *---------------------------------------*
    * Merge in various income scores
    *---------------------------------------*
            
    * Drop people whose income scores won't merge
        drop if fatheroccej==. | race==. | south_merge==. 

    * Census (occupation x race x south) 
        sort fatheroccej race south_merge
        merge m:1 fatheroccej race south_merge using "$CensusData/output/IncomeScores_Coarsened_byrace_bysouth.dta"
        assert _merge!=1 
        drop if _merge==2
        drop _merge 
        
    * 1936 Survey (occupation x race x south)
        merge m:1 fatheroccej race south_merge using "$Survey1936/output/ConsumptionSurvey_1936_IncomeScores.dta"
        assert _merge!=1 
        drop if _merge==2
        drop _merge
        
    * Mix 1936 Survey and 1940 Census income score
        gen father_HHinc_1936fix = avg_HHinc_1940_byrace_bysouth 
        replace father_HHinc_1936fix= avg_totfaminc_1936 if fatheroccej==81 | fatheroccej==21
        label var father_HHinc_1936fix "Father baseline income score, 1936 farm and self-emp"

    *-----------------------*
    * Blended income score 
    *-----------------------*
    /*Note: Will assign the father the 
            Census prediction of the decade
            in which the father is observed.
            E.g., father is observed in 1950, 
            so he receives 1950 predicted income.
    */
      
        gen father_income_baseline = .
        replace father_income_baseline= father_HHinc_1936fix if year<=1940
        replace father_income_baseline=avg_inctot_1950_byocc_byr_bys if year==1950
        forval i=6(1)9 {
            replace father_income_baseline=avg_HHinc_19`i'0_byocc_byr_bys if year==19`i'0
        } 
        
        label var father_income_baseline "Father income, baseline measure"
        
    * Log income
        gen log_father_baseline = log(father_income_baseline) 
        label var log_father_baseline "Logged father income, baseline measure" 
           
    * Save 
        gen father=1
        tempfile fathers_19`d'0
        save `fathers_19`d'0'
            
    }
         
*------------------------------------------------------------------------------**
*------------------------------------------------------------------------------**

**********************************************************
* PART 2: CENSUS CHILDREN
**********************************************************

    foreach d in 5 6 7 8 9 0 1 2 {

        use "$CensusData/input/Census_1910to2010_1pct_raw.dta", clear 
        append using "$CensusData/input/Census_2019_ACS_raw.dta" 
        replace year=2020 if year==2019

    * Keep desired Census decade
        if "`d'"=="0" | "`d'"=="1" | "`d'"=="2" {
            keep if year==20`d'0
        }
        else {
            keep if year==19`d'0
        }

    * Keep US-born individuals
        keep if bpl<100 
        
    * Keep black and white individuals ages 30 to 50 
        keep if inrange(age,30,50) 
        keep if race==1 | race==2 
        
    * Race dummies
        gen black = race==2
        gen white = black==0

    /* Drop non-sample line people in 1950. 
       Income only asked of sample-line people. */
        if "`d'"=="5" {
            keep if slwt>0
            replace perwt = slwt //use sample line weight in 1950
        }
        
    * Fix income variable 
        if "`d'"=="5" {
            drop ftotinc
            replace inctot=0 if inctot<0
            clonevar ftotinc = inctot //for later
        }
        else {  
            replace ftotinc=. if ftotinc==9999999 
            replace ftotinc=0 if ftotinc<0
            sum ftotinc
        }
        
    * Log income (drop zeros)
        keep if ftotinc>0 & ftotinc<.
        gen log_income_c = ln(ftotinc)

    * Fix Census weight to have mean 1
        sum perwt
        local weight_avg = r(mean)
            
        replace perwt = perwt/`weight_avg' 
        
    * Save
        if "`d'"=="0" | "`d'"=="1" | "`d'"=="2" {
            local numb = `d'+10
        }
        else {
            local numb = `d'
        }
        
        gen child =1 
        tempfile sons_`numb'
        save `sons_`numb''
    }

**------------------------------------------------------------------------------**
**------------------------------------------------------------------------------**

*************************************************
* PART 3: CALCULATE THIRD TERM OF DECOMPOSITION *
*************************************************

* Lists for looping
    local women_list "2 3 4 5 6 7"
    local men_list "1 2 3 4 5 6 7"

/* Notes: (1) Will exclude 1910 for women because 1950 inctot
              cannot be accurately measured in the 1950 Census
              due to the sample-line person restriction. 
          (2) Will use 1970 with the 2019 ACS rather than the 
              2020 Census. */
    foreach zed in log rank {
        foreach group in men women {
            foreach x in ``group'_list' {
                
                display "`zed', `group', `x'"

                local x_plus1 = `x'+1
                local x_plus2 = `x'+2
                local x_plus4 = `x'+4
                local x_plus5 = `x'+5

                use `fathers_19`x_plus1'0', clear
                append using `sons_`x_plus4''
                append using `sons_`x_plus5''
                
                replace child=0 if father==1
                tab age child
                
            * Drop 1910 cohort for women
                if "`x'"=="1" & "`group'"=="women" {
                    drop if year==1950 
                }

            * Restrict sample to appropriate fathers and children for each cohort 
                keep if father==1 | (child==1 & birthyr>=19`x'0 & birthyr<19`x_plus1'0)
                tab year
                
            * Restrict gender
                if "`group'"=="men" {
                    keep if father==1 | (child==1 & sex==1)
                }
                if "`group'"=="women" {
                    keep if father==1 | (child==1 & sex==2)
                }
                
            * Rank income 
                if "`zed'"=="rank" {  
                    egen rank_income_c = xtile(ftotinc) if child==1, by(age) nq(100) weight(perwt)
                    sum rank_income_c if child==1 [aw=perwt], d
                    
                    egen rank_income_father = xtile(log_father_baseline) if child==0, by(age) nq(100) weight(perwt)
                    sum rank_income_father if child==0 [aw=perwt], d 
                }
                
            **---------------------------------
                
            /* Reconstruct weight of fathers so that 
               share black is same as share of children */    
                sum black if child==1 [aw=perwt]
                local share_b = `r(mean)'
                local share_w = 1-`r(mean)'
                
                gen wgt_temp=.
                
                foreach y in child father {
                    sum white if `y'==1 [aw=perwt]
                    replace wgt_temp= `share_w' / `r(mean)' if `y'==1 & white==1
                    
                    sum black if `y'==1 [aw=perwt]
                    replace wgt_temp= `share_b'  / `r(mean)' if `y'==1 & black==1
                }
                gen weight = wgt_temp * perwt

            * Store race shares 
                sum black if child==1 [aw=weight]   
                local pb = `r(mean)'
                local pw = 1-`pb'
                
            **---------------------------------

            * Store locals
                if "`zed'"=="log" local child "log_income_c"
                if "`zed'"=="log" local father "log_father_baseline"
                
                if "`zed'"=="rank" local child "rank_income_c"
                if "`zed'"=="rank" local father "rank_income_father"
                
            * Children's income
                display "Children's income"
                sum `child' if child==1 [aw=weight]
                local yc = `r(mean)'
                sum `child' if child==1 & black==0 [aw=weight]
                local yc_w = `r(mean)'
                sum `child' if child==1 & black==1 [aw=weight]
                local yc_b = `r(mean)'
                
            * Parent generation income
                display "Father's income"
                sum `father' if father==1 [aw=weight], d
                local yp = `r(mean)'
                local var_yp = `r(Var)'
                sum `father' if father==1 & black==0 [aw=weight]
                local yp_w = `r(mean)'
                sum `father' if father==1 & black==1 [aw=weight]
                local yp_b = `r(mean)'
                
            * Put pieces of decomposition terms together
                display "Putting pieces together"
                local first_term = (`pw'*`yc_w'*`yp_w') 
                display `first_term'
                local second_term = (`pb'*`yc_b'*`yp_b')
                display `second_term'
                local third_term = `yc'*`yp'
                display `third_term'
                
                local numerator_`x' = `first_term'+`second_term'-`third_term'
                display "Numerator: `numerator_`x''"
                display `var_yp'
                
                local final_term_`x' = (`numerator_`x'') / `var_yp'
                display "Final term: `final_term_`x''"
                
                local var_yp_`x' = `var_yp'
                
            }
            
            foreach x in ``group'_list' {
                display `numerator_`x''
                display `final_term_`x''
                display `var_yp_`x''
            }

        * Save estimates 
            gen numb = _n
            gen decade= 1900+(_n*10)
            gen numerator=.
            gen third_term=.
            gen var_yp =.
            
            foreach x in ``group'_list' {
                replace numerator = `numerator_`x'' if _n==`x'
                replace third_term = `final_term_`x'' if _n==`x'
                replace var_yp= `var_yp_`x'' if _n==`x'
            }
            keep numerator decade third_term numb var_yp
            
        * Figures
            if "`group'"=="men" local numb1 "1910"
            if "`group'"!="men" local numb1 "1920"
            
            if "`group'"=="men" local numb2 "1905"
            if "`group'"!="men" local numb2 "1915"
            
            if "`zed'"=="log" {                
                twoway (connect third_term decade if numerator!=., lcolor(navy) mcolor(navy) lpat(solid) msymbol(square)), ///
                ytitle("Third term" " ") xtitle(" " "Birth cohort")  xlabel(`numb1'(10)1970) xscale(range(`numb2' 1975)) ylabel(0(0.1)0.4) yscale(range(0 0.4))
                graph export "$Mydirectory2/appendix_a/decomp_thirdterm_census_`group'_`zed'.pdf", as(pdf) replace
            }
            
            if "`zed'"=="rank" {                
                twoway (connect third_term decade if numerator!=., lcolor(navy) mcolor(navy) lpat(solid) msymbol(square)), ///
                ytitle("Third term" " ") xtitle(" " "Birth cohort")  xlabel(`numb1'(10)1970) xscale(range(`numb2' 1975)) ylabel(0.04(0.02)0.12) yscale(range(0.04 0.12))
                graph export  "$Mydirectory2/appendix_a/decomp_thirdterm_census_`group'_`zed'.pdf", as(pdf) replace            
            }

        }
    }

